from lxml import etree
from scrapy.selector import Selector
text = """
<html>
<table class='tab'>
    <tr class = 'yrx'>
        <td class = 'yrx1'>yrx1</td>
        <td class = 'yrx2'>yrx2</td>
        <td class = 'yrx3'>yrx3</td>
        <td class = 'yrx4'>yrx4</td>
        <td class = 'yrx5'>平哥</td>
        <td class = 'yrx6'>yrx6</td>
    </tr>
</table>
<a>这是table外的标签</a>
</html>
"""
# 将字符串传唤为html对象
# html = etree.HTML(text)

# result = html.xpath("//td[@class='yrx5']")[0]
# print(etree.tostring(result,encoding='utf8').decode())
# result = html.xpath("//td[@class='yrx5']/text()")[0]
# print(result)

html = Selector(text=text)
# result = html.xpath("//td[@class='yrx5']/text()").get()
# print(result)


# following preceding
# res = html.xpath("//td[@class='yrx3']/following::/td/text()").getall()
# 正则
# res = html.xpath("//td[re:match(@class,'yrx')]").getall()

# not and 不要rd标签中是平哥的
res = html.xpath("//td[contains(@class,'yrx') and not(@class='yrx5')]").getall()
print(res)